In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/apps-user-used-the-most/apriori_data.csv

Apriori Algorithm Intuition:

What are the three essential relations between the support, confidence and lift?

Given two Apps A1 and A2, here are the three essential relations to remember:

Relation between the support and the confidence: confidence(A1 -> A2) =support(A1,A2)/support(A1)

Relation between the lift and the support: lift(A1 -> A2) =support(A1,A2)/support(A1)*support(A2)

Relation between the lift and the confidence (consequence of the two previous equations): lift(A1 -> A2) =confidence(A1 -> A2)/support(A2)

Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Importing Data

In [3]:
dataset = pd.read_csv('/kaggle/input/apps-user-used-the-most/apriori_data.csv')
dataset
Out[3]:
AppName
0 Whatsapp,Youtube,Meet,Amazon,Instagram
1 Discord,Netflix,Instagram,Spotify
2 Whatsapp,Youtube,Zoom,Netflix,Facebook,SnapCha...
3 Whatsapp,Youtube,Meet
4 Whatsapp,Youtube,Meet,Netflix,Instagram
5 Youtube,Meet,SnapChat,Instagram,Spotify
6 Whatsapp,Youtube,Meet,SnapChat,Instagram
7 Whatsapp,Youtube,Zoom,Netflix,SnapChat,Instagram
8 Whatsapp,Youtube,Meet,Netflix,Facebook,Amazon ...
9 Whatsapp,Youtube,Meet,Netflix,Instagram
10 Whatsapp,Youtube,SnapChat,Instagram,Spotify
11 Whatsapp,Youtube,Twitter,Pinterest,Spotify
12 Whatsapp,Youtube,Twitter,Meet,Instagram
13 Whatsapp,Youtube,Meet,Netflix,Instagram
14 Whatsapp,Youtube,Facebook,Amazon,Instagram
15 Whatsapp,Youtube,Twitter,Meet,Spotify
16 Whatsapp,Youtube,Netflix,SnapChat,Instagram
17 Whatsapp,Youtube,Meet,Facebook,Wynk
18 Whatsapp,Youtube,Discord,Meet
19 Whatsapp,Youtube,Twitter
20 Whatsapp,Youtube,Zoom,Hotstar,Amazon
21 Whatsapp,Youtube,Meet,Spotify
22 Whatsapp,Youtube,Meet,Amazon Prime,Instagram
23 Whatsapp,Youtube,Facebook,Instagram
24 Youtube,Meet,SnapChat,Amazon,Instagram
25 Whatsapp,Youtube,Pinterest,Amazon,Instagram
26 Whatsapp,Youtube,Twitter,Meet,Zoom,Facebook
27 Whatsapp,Youtube,Meet,Amazon Prime,Instagram
28 Whatsapp,Youtube,Meet,SnapChat,Amazon,Amazon P...
29 Whatsapp,Youtube,Meet,Amazon,Instagram
30 Meet,SnapChat,Instagram,Spotify

Convert and split the data(String) into the lists format

In [4]:
df = list(dataset["AppName"].apply(lambda x:x.split(',')))
dataset=pd.DataFrame(df)

EDA - Exploratory Data Analysis

In [5]:
#Displaying the First Five Rows
dataset.head()
Out[5]:
0 1 2 3 4 5 6 7 8 9
0 Whatsapp Youtube Meet Amazon Instagram None None None None None
1 Discord Netflix Instagram Spotify None None None None None None
2 Whatsapp Youtube Zoom Netflix Facebook SnapChat Amazon Amazon Prime Instagram Spotify
3 Whatsapp Youtube Meet None None None None None None None
4 Whatsapp Youtube Meet Netflix Instagram None None None None None
In [6]:
#Displaying the last Five Rows
dataset.tail()
Out[6]:
0 1 2 3 4 5 6 7 8 9
26 Whatsapp Youtube Twitter Meet Zoom Facebook None None None None
27 Whatsapp Youtube Meet Amazon Prime Instagram None None None None None
28 Whatsapp Youtube Meet SnapChat Amazon Amazon Prime Instagram None None None
29 Whatsapp Youtube Meet Amazon Instagram None None None None None
30 Meet SnapChat Instagram Spotify None None None None None None
In [7]:
dataset.shape
Out[7]:
(31, 10)
In [8]:
dataset.dtypes
Out[8]:
0    object
1    object
2    object
3    object
4    object
5    object
6    object
7    object
8    object
9    object
dtype: object
In [9]:
dataset.isnull().sum()
Out[9]:
0     0
1     0
2     0
3     2
4     7
5    26
6    29
7    30
8    30
9    30
dtype: int64
In [10]:
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       31 non-null     object
 1   1       31 non-null     object
 2   2       31 non-null     object
 3   3       29 non-null     object
 4   4       24 non-null     object
 5   5       5 non-null      object
 6   6       2 non-null      object
 7   7       1 non-null      object
 8   8       1 non-null      object
 9   9       1 non-null      object
dtypes: object(10)
memory usage: 2.5+ KB

Exporting DataFrame To Csv File

In [11]:
dataset.to_csv('apps_dataset.csv',header=None,index=False)

Data Preprocesssing

In [12]:
dataset = pd.read_csv('./apps_dataset.csv', header = None)
transactions = []
for i in range(0, 31):
  transactions.append([str(dataset.values[i,j]) for j in range(0, 10)])
In [13]:
dataset.head()
Out[13]:
0 1 2 3 4 5 6 7 8 9
0 Whatsapp Youtube Meet Amazon Instagram NaN NaN NaN NaN NaN
1 Discord Netflix Instagram Spotify NaN NaN NaN NaN NaN NaN
2 Whatsapp Youtube Zoom Netflix Facebook SnapChat Amazon Amazon Prime Instagram Spotify
3 Whatsapp Youtube Meet NaN NaN NaN NaN NaN NaN NaN
4 Whatsapp Youtube Meet Netflix Instagram NaN NaN NaN NaN NaN

Training the Apriori model on the dataset

In [14]:
!pip install apyori
Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... - \ done
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5975 sha256=bd694a891c0141b285ec20398b01830d1691a0b16cfd11a71b4f952e39baa93b
  Stored in directory: /root/.cache/pip/wheels/cb/f6/e1/57973c631d27efd1a2f375bd6a83b2a616c4021f24aab84080
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2
WARNING: Running pip as root will break packages and permissions. You should install packages reliably by using venv: https://pip.pypa.io/warnings/venv
In [15]:
from apyori import apriori
rules = apriori(transactions=transactions, min_support = 0.03, min_confidence = 0.2, min_lift = 2, min_length = 2, max_length = 2)

Visualising the results

Displaying the first results coming directly from the output of the apriori function

In [16]:
results=list(rules)
In [17]:
results
Out[17]:
[RelationRecord(items=frozenset({'Hotstar', 'Amazon'}), support=0.03225806451612903, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Hotstar'}), items_add=frozenset({'Amazon'}), confidence=1.0, lift=3.875)]),
 RelationRecord(items=frozenset({'Facebook', 'Amazon Prime'}), support=0.06451612903225806, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Amazon Prime'}), items_add=frozenset({'Facebook'}), confidence=0.4, lift=2.066666666666667), OrderedStatistic(items_base=frozenset({'Facebook'}), items_add=frozenset({'Amazon Prime'}), confidence=0.3333333333333333, lift=2.0666666666666664)]),
 RelationRecord(items=frozenset({'Wynk', 'Facebook'}), support=0.03225806451612903, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Wynk'}), items_add=frozenset({'Facebook'}), confidence=1.0, lift=5.166666666666667)]),
 RelationRecord(items=frozenset({'Zoom', 'Facebook'}), support=0.06451612903225806, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Facebook'}), items_add=frozenset({'Zoom'}), confidence=0.3333333333333333, lift=2.5833333333333335), OrderedStatistic(items_base=frozenset({'Zoom'}), items_add=frozenset({'Facebook'}), confidence=0.5, lift=2.5833333333333335)]),
 RelationRecord(items=frozenset({'Zoom', 'Hotstar'}), support=0.03225806451612903, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Hotstar'}), items_add=frozenset({'Zoom'}), confidence=1.0, lift=7.75), OrderedStatistic(items_base=frozenset({'Zoom'}), items_add=frozenset({'Hotstar'}), confidence=0.25, lift=7.75)]),
 RelationRecord(items=frozenset({'Pinterest', 'Twitter'}), support=0.03225806451612903, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Pinterest'}), items_add=frozenset({'Twitter'}), confidence=0.5, lift=3.1), OrderedStatistic(items_base=frozenset({'Twitter'}), items_add=frozenset({'Pinterest'}), confidence=0.2, lift=3.1)])]

Putting the results well organised into a Pandas DataFrame

In [18]:
def inspect(results):
    App1         = [tuple(result[2][0][0])[0] for result in results]
    App2         = [tuple(result[2][0][1])[0] for result in results]
    Supports    = [result[1] for result in results]
    Confidences = [result[2][0][2] for result in results]
    Lifts       = [result[2][0][3] for result in results]
    return list(zip(App1, App2,Supports, Confidences, Lifts))
final_result = pd.DataFrame(inspect(results), columns = ['App 1', 'App 2', 'Support', 'Confidence', 'Lift'])
In [19]:
final_result
Out[19]:
App 1 App 2 Support Confidence Lift
0 Hotstar Amazon 0.032258 1.000000 3.875000
1 Amazon Prime Facebook 0.064516 0.400000 2.066667
2 Wynk Facebook 0.032258 1.000000 5.166667
3 Facebook Zoom 0.064516 0.333333 2.583333
4 Hotstar Zoom 0.032258 1.000000 7.750000
5 Pinterest Twitter 0.032258 0.500000 3.100000

Displaying the results sorted by descending lifts

In [20]:
final_result.nlargest(n = 10, columns = 'Lift')
Out[20]:
App 1 App 2 Support Confidence Lift
4 Hotstar Zoom 0.032258 1.000000 7.750000
2 Wynk Facebook 0.032258 1.000000 5.166667
0 Hotstar Amazon 0.032258 1.000000 3.875000
5 Pinterest Twitter 0.032258 0.500000 3.100000
3 Facebook Zoom 0.064516 0.333333 2.583333
1 Amazon Prime Facebook 0.064516 0.400000 2.066667

Conclusion

In [21]:
final = final_result.rename({'App 1':'App1','App 2':'App2'}, axis=1)
final
Out[21]:
App1 App2 Support Confidence Lift
0 Hotstar Amazon 0.032258 1.000000 3.875000
1 Amazon Prime Facebook 0.064516 0.400000 2.066667
2 Wynk Facebook 0.032258 1.000000 5.166667
3 Facebook Zoom 0.064516 0.333333 2.583333
4 Hotstar Zoom 0.032258 1.000000 7.750000
5 Pinterest Twitter 0.032258 0.500000 3.100000
In [22]:
plt.figure(figsize=(10,10))
sns.heatmap(data=final.corr(), annot=True, cmap='copper_r')
Out[22]:
<AxesSubplot:>
In [23]:
plt.scatter(final.App1,final.App2)
plt.xlabel('App 1')
plt.ylabel('App 2')
plt.title('App Recommended')
Out[23]:
Text(0.5, 1.0, 'App Recommended')